********************************************************************************
* Replication file for Cui (2020 JEEM)
* "Climate change and adaptation in agriculture: Evidence from US cropping patterns"
*
* Step 1.3: Variable construction
********************************************************************************

* set path
	
	global family "..."
	global data  ".../dta"

* load data: merged county-level data

	use $data/merged_clim_ag, clear
	
* gen trend vars
	gen t = year-1980 if year>1980
	gen t2 = t^2
	
* scale weather vars	
	foreach x in prec{
		qui replace `x' = `x'/100
		qui gen `x'_sq = `x'^2
		}	
	gen tAvg_sq = tAvg^2
	
* gen decades-long weather averages
	* avg
	xtset fips year
	foreach x in tAvg prec dbin8 dbin11 dbin14 dbin17 dbin20 dbin23 dbin26 dbin29 dbin32 dbin32plus {
		*foreach j in 30 {
		forvalues j = 15(5)30 {
		qui gen ma`j'_`x' = L.`x'
		forvalues t = 2/`j' {
			qui replace ma`j'_`x' = 1/`t'*((`t'-1)*ma`j'_`x' + L`t'.`x')
			}
		}	
	}		
		
* planted years for corn OR soy
	egen pl_yrs_cs = total(count) if pl_acre_corn!=. | pl_acre_soy!=., by(fips)
	replace pl_yrs_cs = 0 if missing(pl_yrs_cs)		
		
* log yields
	sum yield*
	gen l_yield_corn = log(yield_corn + 1)
	gen l_yield_soy = log(yield_soy + 1)
	gen l_yield_cotton = log(yield_cotton+1)
	gen l_yield_wheat = log(yield_wheat+1)
	
* log acres: corn, soy, and corn + soy
	gen l_pl_acre_corn = log(pl_acre_corn)
	gen l_pl_acre_soy = log(pl_acre_soy)
	gen l_pl_acre_corn_soy = log(pl_acre_corn +  pl_acre_soy)
	replace l_pl_acre_corn_soy = log(pl_acre_corn) if pl_acre_soy == .
	replace l_pl_acre_corn_soy = log(pl_acre_soy) if pl_acre_corn == .
	
	gen l_pl_acre_wheat = log(pl_acre_wheat)
	gen l_pl_acre_cotton = log(pl_acre_cotton)
		
	gen l_acres_cropland = log(acres_cropland)
		
* acreage levels
	gen pl_acre_corn_soy = exp(l_pl_acre_corn_soy)
	egen pl_acre_corn_soy_avg = mean(pl_acre_corn_soy), by(fips)			
	
* region selection
	drop if inlist(stateansi,53,41,16,56,49,06,32,04)
	drop if inlist(stateansi,08,35) & longitude<-106	
	
* define T-zones
	gen T_marker = floor((ma30_tAvg-1)/2)*2+1 if year==1981
	replace T_marker = 13 if T_marker < 13 & T_marker!=.
	replace T_marker = 25 if T_marker > 25 & T_marker!=.
	egen T_group = mean(T_marker), by(fips)
	
* define P-zones
	gen P_marker = floor(ma30_prec)  if year==1981
	replace P_marker = 2 if P_marker <= 2 & P_marker!=.
	replace P_marker = 8 if P_marker == 9
	egen P_group = mean(P_marker), by(fips)
	
* tab T/P zones
	tab T_group if pl_yrs_cs>0	// zones: 7-by-7		 
	tab P_group if pl_yrs_cs>0	// zones: 7-by-7		 
	
* gen lag price for current year
	xtset fips year
	gen lprice_corn_lag = log(L.price_dfl_corn)
	gen lprice_soy_lag = log(L.price_dfl_soy)
	replace lprice_soy_lag = 0 if lprice_soy_lag==. & lprice_corn_lag!=.
	
* generate share of planted acres
	foreach x in sprwht barley sorghum {
		gen l_pl_acre_`x' = log(pl_acre_`x')
		}
	
	gen pl_acre_cs = pl_acre_corn + pl_acre_soy if pl_acre_corn + pl_acre_soy!=.
	replace pl_acre_cs = pl_acre_corn if pl_acre_soy==.
	replace pl_acre_cs = pl_acre_soy if pl_acre_corn==.
	sum pl_acre_cs
	
* generate relative acreage shares 
	
	foreach x in cs barley sprwht wheat sorghum cotton {
		replace pl_acre_`x' = 0 if pl_acre_`x' == . & year>1980
		}
	
	gen shr_cs_csbl = pl_acre_cs/(pl_acre_cs + pl_acre_barley)
	gen shr_cs_cswt = pl_acre_cs/(pl_acre_cs + pl_acre_wheat)
	gen shr_cs_csswt = pl_acre_cs/(pl_acre_cs + pl_acre_sprwht)
	gen shr_cs_cssg = pl_acre_cs/(pl_acre_cs + pl_acre_sorghum)
	gen shr_cs_csct = pl_acre_cs/(pl_acre_cs + pl_acre_cotton)	
	
* label major areas
	gen pl_acre_cs_marker = pl_acre_cs if year==1981
	egen pl_acre_cs_1981 = mean(pl_acre_cs_marker), by(fips)
		
* other vars in log
	gen l_pop = log(pop)
	gen l_crp_area = log(crp_area+1)
		
	foreach x in corn soy {
		replace areaInsured_`x' = 0 if areaInsured_`x' == .
		gen l_areaInsured_`x' = log(areaInsured_`x'+1)
		}	
	
* rescale T P variables 
	forvalues t = 15(5)30 {
		replace ma`t'_tAvg = ma`t'_tAvg * 10
		replace ma`t'_prec = ma`t'_prec * 10
		}
		replace tAvg = tAvg*10
		replace prec = prec*10
		
		replace lag_tAvg = lag_tAvg*10
		replace lag_prec = lag_prec/10
		
* calculate within-county variability
	foreach x in ma30_tAvg ma30_prec {
		egen withinsd_`x' = sd(`x') if year>1980 & irr_ratio <0.1 & pl_yrs_cs>=20, by(fips)
		}
	sum withinsd_ma30*
	
	foreach x in l_acres_cropland l_pl_acre_corn_soy {
		egen withinsd_`x' = sd(`x') if year>1980 & irr_ratio <0.1 & pl_yrs_cs>=20 & l_acres_cropland!=., by(fips)
		}
	sum withinsd_l*	
	
	
* save data
	save $data/dataForReg, replace	
